import pandas as pd


"""
This file is to combine the session data with post  questionaire, and to make it easier to work with.


@author: Jonathan van Oudheusden
@date: 2024-01-28

Required files: 
    - sessionsdata_anonym.csv
    - prolific_profile_anonym.csv

    
Output files:
    - ProcessedSessionsData.xlsx
"""

filePathSession = 'Givenfiles/Data/sessionsdata_anonym.csv'
filePathUser = 'Givenfiles/Data/prolific_profile_anonym.csv'

dfSession = pd.read_csv(filePathSession)
dfUser = pd.read_csv(filePathUser)

activityDict = {
  '1': "preparatory",
  '2': "preparatory",
  '3': "preparatory",
  '4': "preparatory",
  '5': "preparatory",
  '6': "Self-efficacy",
  '7': "Practical knowledge",
  '8': "Awareness of positive outcomes",
  '9': "Awareness of negative outcomes",
  '10': "Motivation to change",
  '11': "Knowledge of how to maintain/achieve mental well-being",
  '12': "Mindset that physical activity helps to quit smoking",
  '13': "Awareness of smoking patterns",
  '14': "Knowledge of how to maintain/achieve well-being"
}


sessions_list = []


condition_column = "response_type"
target_value = "cluster_new_index"


def map_values(row):
    if row[condition_column] == target_value:
        return activityDict.get(row['response_value'], row['response_value'])
    else:
        return row['response_value']
    
dfSession.loc[dfSession[condition_column] == target_value, 'response_value'] = dfSession['response_value'].map(activityDict)
pivot_df = dfSession.pivot_table(index=['rand_id', 'session_num'], columns='response_type', values='response_value', aggfunc=lambda x: x.iloc[0])


portquestionaire_df = pd.read_csv('Givenfiles/Data/postquestionnaire_anonym.csv')
# Give session name to postquestionaire data
portquestionaire_df['session_num'] = 6

# rename coloumns to the names of session data
portquestionaire_df.rename(columns={'Activity_experience': 'activity_experience_slot'}, inplace=True)
portquestionaire_df.rename(columns={'Effort_1': 'effort'}, inplace=True)


drop_columns = ['Finished', 'Weekly_Exercise', 'Godin_exercise_11', 'Godin_exercise_8', 
                'Godin_exercise_9', 'Smoking_Freq', 'Last_smoke', 'Quitter_Self_Identit_1', 
                'Quitter_Self_Identit_2', 'Quitter_Self_Identit_3',  'Quitter_Self_Identit_4' ,
                'AC2'
                 ]
portquestionaire_df.drop(columns=drop_columns, inplace=True)

# this makes concatting the data work
pivot_df.reset_index(inplace=True)

# concat data
merged_df = pd.concat([pivot_df, portquestionaire_df], axis=0, ignore_index=True)
# Add pivot back to make excel ordered 
merged_df = merged_df.pivot_table(index=['rand_id', 'session_num'], aggfunc=lambda x: x.iloc[0])#, columns='response_type', values='response_value', aggfunc=lambda x: x.iloc[0])


merged_df.to_excel('ProcessedSessionsData.xlsx')
